/*******************************************************************************
* Who Becomes a Teacher? Experimental Evidence from an Information Intervention

* Purpose: Create main tables/figures for the paper.

* Created by: Antonella Rivera
* Created on: 21/09/2024
* Last modified on: 
* Last modified by: 
* Edits history:

*******************************************************************************/

********************************************************************************
* Set up
********************************************************************************
clear all
set more off
capture log close

if "`c(username)'"=="ANTONELLAR"  {
	global directory = "C:/Users/ANTONELLAR/Dropbox/TeachersPeru/7. Text/International Journal of Educational Development/replication_package/analysis" 
} 

global inputs = "$directory/inputs"
global results "$directory/outputs"
global temp "$directory/temp"

********************************************************************************
* Table 1: Sample Description
********************************************************************************
* Estimations of statistics
********************************************************************************
use "$inputs/experiment_data.dta", replace

local sample_desc "age female ses failed_class gpa held_back Pmath_std_c Preading_std_c Piq_std_c Pvocab_std_c Pextroversion_std_c Pagreeableness_std_c Pconscient_std_c Pneuro_std_c Popen_std_c Pempat_std_c Ppatien_std_c Pavers_std_c Pfirme_std_c int_teach pub univ s_costs_std_i s_job_quality_std_i s_interests_std_i s_society_std_i knows_changes changes_positive gov_changes"
local sample_desc2 "age female ses failed_class gpa held_back Pmath_std_i Preading_std_i Piq_std_i Pvocab_std_i Pextroversion_std_i Pagreeableness_std_i Pconscient_std_i Pneuro_std_i Popen_std_i Pempat_std_i Ppatien_std_i Pavers_std_i Pfirme_std_i int_teach pub univ s_costs_std_i s_job_quality_std_i s_interests_std_i s_society_std_i knows_changes changes_positive gov_changes"
local education	"patience vocation family easy scholarships prestige wages"

mat define tmat = J(37, 6, .)

* Columns 1 to 3 (Control, Treatment, p-value)
local i = 1

foreach var in `sample_desc' {

	local lab`i' = "`var'"
	local rnum = `i'

	qui areg `lab`i'' treatment school_admin, a(strata) cluster(cod_mod)

	local d1 = _b[_cons] + _b[treatment]
	local d0 = _b[_cons]
	matrix tmat[`rnum', 1] = `d0'
	matrix tmat[`rnum', 2] = `d1'	
	matrix tmat[`rnum', 3] = r(table)[4,1]

	local i = `i' + 1

}

* Columns 4 to 6 (Intention to teach)
local i = 1
foreach var in `sample_desc2' `education'{
	
	local lab`i' = "`var'"
	local rnum = `i'
	
	qui areg `lab`i'' int_teach school_admin, a(strata) cluster(cod_mod)
	
	local d1 = _b[_cons] + _b[int_teach]
	local d0 = _b[_cons]
	matrix tmat[`rnum', 4] = `d0'
	matrix tmat[`rnum', 5] = `d1'
	matrix tmat[`rnum', 6] = r(table)[4,1]
	
	local i = `i' + 1
	
}

* Observations
qui su treatment if treatment == 1
local NT = r(N)
qui su treatment if treatment == 0
local NC = r(N)
qui su int_teach if int_teach == 1
local YI = r(N)
qui su int_teach if int_teach == 0
local NI = r(N)

matrix tmat[37,1] = `NC'
matrix tmat[37,2] = `NT'
matrix tmat[37,4] = `NI'
matrix tmat[37,5] = `YI'

* Format table
********************************************************************************
global names "`sample_desc' `education'"

matrix rownames tmat = $names N
matrix colnames tmat = Control Treatment "p-value" Yes No "p-value"
mat list tmat, format(%20.2fc)

svmat tmat 
forvalues i = 1(1)6 {
  gen var`i' = string(tmat`i',"%20.2fc")
}
drop tmat*

gen Variable = ""
local i = 1                              
foreach var in $names {
	local lab: variable label `var'  
	replace Variable = "`lab'" in `i'
	local i = `i' + 1
}
drop if _n >= 38
 
ren var1 Control
ren var2 Treatment
ren var3 pvalue
ren var4 Yes
ren var5 No
ren var6 Pvalue
label var Variable ""
label var No "No Intent to teach"
label var Yes "Intent to teach"
label var Pvalue "P-value of equality"
keep Variable Control Treatment pvalue Yes No Pvalue
order Variable Control Treatment pvalue Yes No Pvalue

replace Variable = "\midrule Observations" in 37
replace Yes = "." if Variable == "Intention to Teach (%)"
replace No = "." if Variable == "Intention to Teach (%)"
replace Pvalue = "." if Variable == "Intention to Teach (%)"
replace Pvalue = " " if Variable == "\midrule Observations" 
replace pvalue = " " if Variable == "\midrule Observations" 

egen latex = concat(*), p("&")
replace latex = latex + "\\"
keep latex

insobs 3, before(1)
replace latex = "\toprule & & & & \multicolumn{3}{c}{Intention to teach} \\" in 1
replace latex = "& Control & Treatment & p-value & No & Yes & p-value \\" in 2
replace latex = "\midrule \multicolumn{7}{c}{A. Socio-demographic Characteristics} \\ \midrule" in 3

insobs 2, before(10)
replace latex = "\midrule \multicolumn{7}{c}{B. Standardized Test Scores} \\" in 10
replace latex = "\midrule \multicolumn{7}{c}{Cognitive} \\" in 11

insobs 1, before(16)
replace latex = "\multicolumn{7}{c}{Non-Cognitive} \\" in 16

insobs 1, before(26)
replace latex = "\midrule \multicolumn{7}{c}{C. Career Choice} \\ \midrule" in 26

insobs 1, before(34)
replace latex = "\midrule \multicolumn{7}{c}{D. Changes in Teaching Career} \\ \midrule" in 34

insobs 1, before(38)
replace latex = "\midrule \multicolumn{7}{c}{E. Reasons to Study (or not) Education} \\ \midrule" in 38

insobs 2, after(46)
replace latex = "\bottomrule & & & & & & \\" in 47
replace latex = "\end{tabular}" in 48

insobs 1, before(1)
replace latex = "\begin{tabular}{lcccccc}" in 1

cap file close myfile
file open myfile using "$results/Tables/balance_table.tex", write replace

forv i = 1/`=_N'{
	loc t0 = latex in `i'
	file write myfile `"`t0'"' _n 
} 

file close myfile

********************************************************************************
* Figure 1: Percentage Agreement on Statements About the Public Teaching Career
********************************************************************************
use "$inputs/experiment_data.dta", replace

* The midline survey was collected only among treated students
keep if treatment == 1 

keep q11 q12 q13 q14 q15 q16 q01 q02 q03 q04 q05 q06 prob_educ_pre prob_educ_post

* Create a new variable indicating the likelihood of studying education with two categories: unlikely and likely
gen q08 = prob_educ_pre == 3 | prob_educ_pre == 4 if prob_educ_pre != .
gen q18 = prob_educ_post == 3 | prob_educ_post == 4 if prob_educ_post != .
drop prob_educ_pre prob_educ_post

* Reshape
gen id = _n

reshape long q0 q1, i(id) j(Q)
label var q0 "Before"
label var q1 "After"
label define Q 1 "Attractive Carrer" 2 "Self-Realization" 3 "Contribution to Society" 4 "Job Stability" 5 "Good Wages" 6 "Challenging and Creative Work" 8 "Likelihood of Studying Education"
label values Q Q

* Same number of observations before and after
replace q0 = . if q1 == .
replace q1 = . if q0 == .

* Test if differences are statistically significant
forval i = 1/6 {
	ttest q0 = q1 if Q == `i'
}
ttest q0 = q1 if Q == 8

* Creating a empty variable for the graph
insobs 5, after(24486)
replace Q = 7 if Q == .
replace q0 = 0 if Q == 7
replace q1 = 0 if Q == 7

* Graph
graph bar (mean) q0 q1, over(Q, label(labsize(2.5)) relabel(1 `""Attractive" "Career""' 2 `""Self" "Realization""' 3 `""Contribution" "to Society""' 4 `""Job" "Stability""' 5 `""Good" "Wages""' 6 `""Challenging and" "Creative Work""' 7 `""" """' 8 `""Likelihood of" "Studying Education""')) bar(1, bfcolor(navy)) bar(2, bfcolor(maroon)) bgcolor(white) graphregion(color(white)) ytitle("Percentage of Agreement") ylabel( .2 "20"  .4 "40" .6 "60" .8 "80" 1 "100") legend(order(1 "`: var label q0'" 2 "`: var label q1'")) graphregion(margin(vsmall)) 

graph export "$results/Figures/agree_statements.eps", as(eps) preview(off) replace
 
********************************************************************************
* Table 2: Treatment Effects on Career Choice
********************************************************************************
use "$inputs/experiment_data.dta", replace

* Data prep
global reg1 treatment school_admin 
global reg2 treatment school_admin age Pmath Preading Pvocab Ppatien
global reg3 treatment school_admin age Pmath Preading Pvocab Ppatien ses female
global reg4 treatment school_admin age Pmath Preading Pvocab Ppatien ses female teaching_career_before

global regb1 
global regb2 mage mPmath mPreading mPvocab mPpatien
global regb3 mage mPmath mPreading mPvocab mPpatien mses mfemale
global regb4 mage mPmath mPreading mPvocab mPpatien mses mfemale mteaching_career_before

* Estimation of statistics 
qui su education 
local NE = r(N)
su education if treatment == 0
local mu0 = round(r(mean),0.001)

qui su studied 
local NS = r(N)
su studied if treatment == 0
local mu1 = round(r(mean),0.001)

qui su education if int_enroll == 1 
local NI = r(N)
su education if int_enroll == 1 & treatment == 0
local mu2 = round(r(mean),0.001)

forvalues i = 1(1)4 {
	if `i' == 1 local append = "replace"
	if `i' > 1  local append = "append"
		qui reg education ${reg`i'} ${regb`i'} i.strata, vce(cluster cod_mod)
		outreg2 using "$temp/longrun1.dta", `append' keep(treatment) dec(3) nocon nonot nor dta addtext(Mean, `mu0')
		qui reg studied ${reg`i'} ${regb`i'} i.strata, vce(cluster cod_mod)
		outreg2 using "$temp/longrun2.dta",`append' keep(treatment) dec(3) nocon nonot nor dta addtext(Mean, `mu1')
		qui reg education ${reg`i'} ${regb`i'} i.strata if int_enroll == 1, vce(cluster cod_mod)
		outreg2 using "$temp/longrun3.dta", `append' keep(treatment) dec(3) nocon nonot nor dta addtext(Mean, `mu2')
}	

* Shift rows to columns 
capture program drop prepforappend
program define prepforappend
{
	forvalues j = 6(1)8 { 
		gen v`j' = ""
	}
	replace v6 = v2[7] in 4
	replace v7 = v2[8] in 4
	replace v8 = v2[2] in 4
	drop if _n < 4 | _n >= 7
	drop v1
	order v8
	save, replace
}
end

forvalues i= 1(1)3 {
	use "$temp/longrun`i'_dta.dta", clear
	prepforappend
}

use "$temp/longrun1_dta.dta", clear
append using "$temp/longrun2_dta.dta"
append using "$temp/longrun3_dta.dta"

replace v7 = "0" + v7 if v7 != ""

replace v8 = "\midrule \multirow{2}{6cm}{Probability of studying an education-related major}" in 1
replace v8 = "\multirow{2}{6cm}{Probability of enrolling in tertiary education}" in 4
replace v8 = "\multirow{2}{6cm}{Probability of studying education (conditional on tertiary education intent)}" in 7

insobs 3, after(9)

replace v8 = "\midrule Main Control Set" in 10 
replace v2 = "No" in 10 
replace v3 = "Yes" in 10 
replace v4 = "Yes" in 10 
replace v5 = "Yes" in 10 

replace v8 = "Socio-Demographic Variables" in 11 
replace v2 = "No" in 11
replace v3 = "No" in 11
replace v4 = "Yes" in 11 
replace v5 = "Yes" in 11 

replace v8 = "Opinion of Teaching Career" in 12
replace v2 = "No" in 12
replace v3 = "No" in 12
replace v4 = "No" in 12
replace v5 = "Yes" in 12

egen latex = concat(*), p("&")
replace latex = latex + "\\"
keep latex

insobs 2, after(12)

replace latex = "\bottomrule & & & & & & \\" in 13
replace latex = "\end{tabular}" in 14

insobs 3, before(1)
replace latex = "\begin{tabular}{lcccccc}" in 1
replace latex = "\toprule & (1) & (2) & (3) & (4) & Obs. & Control \\" in 2
replace latex = "& & & & & & Mean \\" in 3

cap file close myfile
file open myfile using "$results/Tables/treatment_effects_rev.tex", write replace

forv i = 1/`=_N'{
	loc t0 = latex in `i'
	file write myfile `"`t0'"' _n 
} 

file close myfile

********************************************************************************
* Table 3: Heterogeneity in Treatment Effects on Career Choice
********************************************************************************
* Table 3.1: Heterogeneity in Treatment Effects on Education Major
use "$inputs/experiment_data.dta", replace

gen high_int_teach = int_teach
gen high_female = female
gen high_knows_changes = knows_changes

* Compiling matrix 
mat define het = J(14,4,.)

local j = 1
local k = 1
foreach var in female ses Pmath Ppatien knows_changes teaching_career_before int_teach {
	forval i = 0/1 {
		qui reg education treatment school_admin i.strata if high_`var' == `i'
		est store `var'`i'
		qui reg education treatment school_admin i.strata if high_`var' == `i', vce(cluster cod_mod)
		mat het`j' = r(table)
		mat het[`j',1] = het`j'[1,1]
		mat het[`j',2] = het`j'[2,1]
		mat het[`j',3] = het`j'[4,1]
		local j = `j' + 1
	}
	suest `var'0 `var'1
	test [`var'0_mean]treatment = [`var'1_mean]treatment
	mat het[`k',4] = r(p) 
	local k = `k' + 2
}

clear
svmat het

rename het1 b
rename het2 se
rename het3 p_value
rename het4 p_value2

gen order = ceil(_n / 2)

preserve 
keep order p_value2
drop if p_value2 == .
tempfile p_value
save `p_value', replace 
restore

gen S = "*" if p_value < 0.1
replace S = "**" if p_value < 0.05
replace S = "***" if p_value < 0.01

egen col = seq(), f(1) t(2)

* Left column
preserve
keep if col == 1
gen OLS = string(b, "%20.3fc")
replace OLS = OLS + S
gen order_no = 1
keep col order order_no OLS
tempfile means
save `means', replace
restore

preserve
keep if col == 1
keep se order col
gen OLS = string(se, "%20.3fc")	
replace OLS = "(" + OLS + ")"
drop se
gen order_no = 2
append using `means'	
sort order order_no
tempfile leftcol
save `leftcol', replace
restore

* Right column
preserve
keep if col == 2
gen OLS2 = string(b, "%20.3fc")
replace OLS2 = OLS2 + S
gen order_no = 1
keep col order order_no OLS2
tempfile means
save `means', replace
restore

keep if col == 2
keep se order col
gen OLS2 = string(se, "%20.3fc")	
replace OLS2 = "(" + OLS2 + ")"
drop se
gen order_no = 2
append using `means'	
sort order order_no

* Merge columns
merge 1:1 order order_no using `leftcol', nogen
merge m:1 order using `p_value', nogen
gen p_value = string(p_value2, "%20.2fc") if order_no == 1
sort order order_no
drop col order_no p_value2 order
order OLS OLS2 p_value

gen var_name = ""
replace var_name = "\midrule Sex [Male - Female]" in 1
replace var_name = "Socio-Economic Level [Low - High]" in 3
replace var_name = "Mathematics [Low - High]" in 5
replace var_name = "Patience [Low - High]" in 7
replace var_name = "Had Prior Information About Changes [No - Yes]" in 9
replace var_name = "Opinion of Teaching Career [Low - High]" in 11
replace var_name = "Intention to Teach [No - Yes]" in 13
order var_name

gen order = _n

tempfile het_educ
save `het_educ', replace 

* Table 3.2: Heterogeneity in Treatment Effects on Tertiary Education
use "$inputs/experiment_data.dta", replace

gen high_int_teach = int_teach
gen high_female = female
gen high_knows_changes = knows_changes

* Compiling matrix 
mat define het = J(14,4,.)

local j = 1
local k = 1
foreach var in female ses Pmath Ppatien knows_changes teaching_career_before int_teach {
	forval i = 0/1 {
		qui reg studied treatment school_admin i.strata if high_`var' == `i'
		est store `var'`i'
		qui reg studied treatment school_admin i.strata if high_`var' == `i', vce(cluster cod_mod)
		mat het`j' = r(table)
		mat het[`j',1] = het`j'[1,1]
		mat het[`j',2] = het`j'[2,1]
		mat het[`j',3] = het`j'[4,1]
		local j = `j' + 1
	}
	suest `var'0 `var'1
	test [`var'0_mean]treatment = [`var'1_mean]treatment
	mat het[`k',4] = r(p) 
	local k = `k' + 2
}

clear
svmat het

rename het1 b
rename het2 se
rename het3 p_value
rename het4 p_value2

gen order = ceil(_n / 2)

preserve 
keep order p_value2
drop if p_value2 == .
tempfile p_value
save `p_value', replace 
restore

gen S = "*" if p_value < 0.1
replace S = "**" if p_value < 0.05
replace S = "***" if p_value < 0.01

egen col = seq(), f(1) t(2)

* Left column
preserve
keep if col == 1
gen OLS = string(b, "%20.3fc")
replace OLS = OLS + S
gen order_no = 1
keep col order order_no OLS
tempfile means
save `means', replace
restore

preserve
keep if col == 1
keep se order col
gen OLS = string(se, "%20.3fc")	
replace OLS = "(" + OLS + ")"
drop se
gen order_no = 2
append using `means'	
sort order order_no
tempfile leftcol
save `leftcol', replace
restore

* Right column
preserve
keep if col == 2
gen OLS2 = string(b, "%20.3fc")
replace OLS2 = OLS2 + S
gen order_no = 1
keep col order order_no OLS2
tempfile means
save `means', replace
restore

keep if col == 2
keep se order col
gen OLS2 = string(se, "%20.3fc")	
replace OLS2 = "(" + OLS2 + ")"
drop se
gen order_no = 2
append using `means'	
sort order order_no

* Merge columns
merge 1:1 order order_no using `leftcol', nogen
merge m:1 order using `p_value', nogen
gen p_value = string(p_value2, "%20.2fc") if order_no == 1
sort order order_no
drop col order_no p_value2 order
order OLS OLS2 p_value

rename p_value p_value2
rename OLS OLS3 
rename OLS2 OLS4

gen order = _n

merge 1:1 order using `het_educ'

gen break = ""
gen break2 = ""

drop _merge order
order var_name break OLS OLS2 p_value break2 OLS3 OLS4 p_value2

egen latex = concat(*), p("&")
replace latex = latex + "\\"
keep latex

insobs 5, before(1)
replace latex = "\begin{tabular}{lcccccccc}" in 1
replace latex = "\toprule Outcome && \multicolumn{3}{c}{Studying an} && \multicolumn{3}{c}{Enrolling in} \\" in 2
replace latex = "&& \multicolumn{3}{c}{education-related major} && \multicolumn{3}{c}{tertiary education} \\" in 3
replace latex = "\cline{1-1} \cline{3-5} \cline{7-9} Categories [Group 1 - Group 2] && Group 1 & Group 2 & p-value && Group 1 & Group 2 & p-value \\" in 4
replace latex = "&& (1) & (2) & (3) && (4) & (5) & (6) \\" in 5

insobs 2, after(19)
replace latex = "\bottomrule & & & & & & & & \\" in 20
replace latex = "\end{tabular}" in 21

cap file close myfile
file open myfile using "$results/Tables/heterogeneity_effects.tex", write replace

forv i = 1/`=_N'{
	loc t0 = latex in `i'
	file write myfile `"`t0'"' _n 
} 

file close myfile

********************************************************************************
* Table 4: Comparison of Students Who Chose Education vs. Other Majors
********************************************************************************
use "$inputs/experiment_data.dta", replace

* Data prep
cap drop pub univ
gen pub = (i_est == 1 | u_est == 1)
gen univ = (u_pri == 1 | u_est == 1)

label var pub "Public Institution"
label var univ "University"

local where "pub univ"
local how "private scholarship time1 studied_another"
local jobs "works wage"
global names "`where' `how' `jobs'"

* Estimation of statistics
mat define tmat = J(10,3,.)

local i = 1
foreach var in `where' `how' `jobs' {
	ttest  `var', by(education)
	matrix tmat[`i',1] = r(mu_1)
	matrix tmat[`i',2] = r(mu_2)
	matrix tmat[`i',3] = r(p)
	local i = `i'+1
}

qui su education if education == 1
local NT = r(N)
qui su education if education == 0
local NC = r(N)

matrix tmat[10,1] = `NC'
matrix tmat[10,2] = `NT'

matrix rownames tmat = `where' `how' `jobs' N
matrix colnames tmat = Other Education "P-Value"
mat list tmat, format(%9.2f)

* Export to tex
svmat tmat 
drop if tmat1 == .
forvalues i = 1(1)3 {
  gen var`i' = string(tmat`i',"%9.2f")
}
drop tmat*

gen Variable = ""
local i = 1                              
foreach var in $names {
	local lab: variable label `var'  
	replace Variable = "`lab'" in `i'
	local i = `i'+1
}

ren var1 Other
ren var2 Education
ren var3 Pvalue
label var Variable ""
label var Other "Other Majors"
label var Education "Education"
label var Pvalue "P-value"
keep Variable Other Education Pvalue
order Variable Other Education Pvalue
drop if Pvalue == "."

egen latex = concat(*), p("&")
replace latex = latex + "\\"
keep latex

insobs 3, before(1)
replace latex = "\begin{tabular}{lccc}" in 1
replace latex = "\toprule & Other Majors & Education & p-value \\" in 2
replace latex = "\midrule \multicolumn{4}{c}{A. Where Do They Study} \\ \midrule" in 3

insobs 1, before(6)
insobs 1, before(11)
replace latex = "\midrule \multicolumn{4}{c}{B. How Do They Study} \\ \midrule" in 6
replace latex = "\midrule \multicolumn{4}{c}{C. Job Market} \\ \midrule" in 11

insobs 3, after(13)
replace latex = "\midrule Observations & 6,529 & 78 & \\" in 14
replace latex = "\bottomrule & & & \\" in 15
replace latex = "\end{tabular}" in 16

cap file close myfile
file open myfile using "$results/Tables/comparison_educ_others.tex", write replace

forv i = 1/`=_N'{
	loc t0 = latex in `i'
	file write myfile `"`t0'"' _n 
} 

file close myfile
